{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "一个简单的例子:y=2XTX,(y'=4X）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([0., 1., 2., 3.])"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "x=torch.arange(4.0)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n"
     ]
    }
   ],
   "source": [
    "\"\"\"在 PyTorch 中，x.requires_grad = True 是用于设置张量 x 的一个属性，使得该张量在进行反向传播时能够计算梯度。\n",
    "这对于需要优化的参数非常重要，特别是在训练神经网络时。\n",
    "当你将 requires_grad 属性设置为 True 时，PyTorch 会自动记录所有对该张量的操作，以便在调用 backward() 方法时能够计算出梯度。\n",
    "这使得你可以使用优化算法（如 SGD、Adam 等）来更新模型参数。\"\"\"\n",
    "x.requires_grad=True#等于x=torch.range(4,requires_grad=True)\n",
    "print(x.grad)#没有梯度，默认None\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(28., grad_fn=<MulBackward0>)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y=2*torch.dot(x,x)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([ 0.,  4.,  8., 12.])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.backward()#反向传播\n",
    "x.grad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([True, True, True, True])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.grad==4*x#2X²导数为4X"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "计算另一个函数：y=x1+x2+x3+x4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([1., 1., 1., 1.])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.grad.zero_()#pytorch默认保存梯度，这里清除梯度\n",
    "y=x.sum()\n",
    "y.backward()\n",
    "x.grad#分别对四个X求得到偏导：1\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 非标量变量的反向传播"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "X=torch.arange(16,dtype=torch.float32).reshape((4,4))\n",
    "\n",
    "X.requires_grad=True\n",
    "y=X*X\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "对于多维张量，在进行反向传播时，通常传入 torch.ones(X.shape)，\n",
    "这样可以确保计算每个元素的梯度，并且每个元素的梯度权重都是 1。\n",
    "\"\"\"\n",
    "y.backward(torch.ones(X.shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[True, True, True, True],\n",
       "        [True, True, True, True],\n",
       "        [True, True, True, True],\n",
       "        [True, True, True, True]])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.grad==2*X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 0.,  2.,  4.,  6.],\n",
       "        [ 8., 10., 12., 14.],\n",
       "        [16., 18., 20., 22.],\n",
       "        [24., 26., 28., 30.]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.grad"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "分离计算：detach"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "X.grad.zero_()\n",
    "y=X*X\n",
    "u=y.detach()\n",
    "z=u*X\n",
    "z.backward(torch.ones(X.shape))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[True, True, True, True],\n",
       "        [True, True, True, True],\n",
       "        [True, True, True, True],\n",
       "        [True, True, True, True]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.grad==u#u就相当于一个常数被分离出来，z=ux不是z=x三次方"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[  0.,   1.,   4.,   9.],\n",
       "        [ 16.,  25.,  36.,  49.],\n",
       "        [ 64.,  81., 100., 121.],\n",
       "        [144., 169., 196., 225.]])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.grad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[  0.,   1.,   4.,   9.],\n",
       "        [ 16.,  25.,  36.,  49.],\n",
       "        [ 64.,  81., 100., 121.],\n",
       "        [144., 169., 196., 225.]])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "u"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "python控制流(for ,if,while。。。。)的计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(True)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def f(num):\n",
    "    b=num*2\n",
    "    while b.norm()<1000:\n",
    "        b=b*2\n",
    "    if b.sum()>0:\n",
    "        c=b\n",
    "    else:\n",
    "        c=100*b\n",
    "    return c\n",
    "a=torch.rand(size=(),requires_grad=True)#生成一个单一的数值\n",
    "z=f(a)\n",
    "z.backward()\n",
    "\"\"\"\n",
    "我们现在可以分析上面定义的f函数。请注意，它在其输入a中是分段线性的。换言之，对于任何a，存在某个\n",
    "常量标量k，使得f(a)=k*a，其中k的值取决于输入a，因此可以用d/a验证梯度是否正确。\n",
    "\"\"\"\n",
    "a.grad==z/a"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.5"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "12cf4d0b9b7b18c55261077a6853aabe6f033db06abf1184072cd2e823f414c8"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
